rm(list=ls(all=T))
pacman::p_load(readr, dplyr, ggplot2, stringr, maps, Matrix, d3heatmap, plotly, googleVis)
load("data/X.rdata")
# pat = paste(iso3166$ISOname, collapse="|")
# CX = str_extract_all(X$text, regex(pat, ignore.case=TRUE))
# save(CX, file="data/CX.rdata")
load("data/CX.rdata")
N9 = unlist(CX) %>% table %>% sort(dec=T) %>% {.[. > 9]} %>% names
C9 = lapply(CX, match, N9)
df = do.call(rbind, lapply(1:length(C9), function(i) {
if(length(C9[[i]]) > 0) data.frame(i = i, j = C9[[i]])
}))
df = subset(df, complete.cases(df))
df$x = 1
mx = sparseMatrix(i=df$i, j=df$j, x=df$x, dimnames=list(1:10760, N9)) %>%
as.data.frame.matrix
z = table(X$sub) %>% sort(dec=T) %>% names
X$sub = factor(X$sub, levels=z)
X$year = format(X$date, "%Y")
No. Mentions per Subject/Country
mx = sparseMatrix(i=df$i, j=df$j, x=df$x, dimnames=list(1:10760, N9)) %>%
as.data.frame.matrix
A = sapply(split(mx, X$sub), colSums)
t(A)[,1:12]
## Germany Denmark China Netherlands Taiwan France
## Business & Finance 362 208 248 202 231 190
## R&D 138 148 334 62 45 80
## Grid Connection 296 120 30 112 8 69
## Authorities 133 113 64 92 105 83
## Technology 91 74 93 43 11 45
## Operations & Maintenance 119 67 24 60 20 14
## Vessels 66 41 54 51 24 12
## Training & Education 68 51 11 27 18 5
## Contracts & Tenders 54 33 17 58 65 4
## Environment 16 12 10 14 18 1
## Ports & Logistics 32 24 3 18 17 7
## Jobs & Recruitment 2 6 3 2 4 0
## Industry Contribution 6 1 4 5 19 2
## Wind Farm Update 1 1 1 1 3 0
## Japan Norway Ireland Belgium India United States
## Business & Finance 185 107 70 95 50 34
## R&D 117 64 70 28 140 102
## Grid Connection 22 95 37 61 4 7
## Authorities 50 50 157 18 36 65
## Technology 79 34 28 21 19 14
## Operations & Maintenance 8 32 19 23 16 7
## Vessels 6 32 3 25 0 6
## Training & Education 10 6 23 0 6 7
## Contracts & Tenders 18 8 0 14 8 3
## Environment 2 6 6 2 15 10
## Ports & Logistics 4 0 13 12 0 2
## Jobs & Recruitment 0 0 3 0 0 1
## Industry Contribution 5 3 0 1 1 1
## Wind Farm Update 0 0 0 3 0 0
t(A)[,1:12] %>% as.data.frame.matrix %>% d3heatmap(F,F,col="Greens")
Subjects by Country by Year
df = do.call(rbind, lapply(names(mx)[1:8], function(z){
xtabs(~ sub + year, X[mx[,z] > 0 & X$year > 2010 & X$year < 2019, ])[1:8,] %>% t
})) %>% data.frame
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
## --> row.names NOT used
df = data.frame(
country = factor(rep(names(mx)[1:8], each=8), names(mx)[1:8]),
year = rep(2011:2018, 8),
df)
df$Total = rowSums(df[,3:10])
Interactive Line Plot
ggplotly( ggplot(df, aes(x = year, y = R.D, col=country)) +
geom_line(lwd=1) +
ggtitle("No. Mention in R&D Sub-Category, Top 8 Countries")
)
Fully Interactive Motion Bubble Plot
op = options(gvis.plot.tag='chart')
plot( gvisMotionChart(
df, "country", "year",
options=list(width=800, height=600) ))